{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import json\n",
    "import re\n",
    "import networkx as nx\n",
    "import matplotlib.pyplot as plt\n",
    "import csv\n",
    "import pandas as pd\n",
    "from community import community_louvain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "retweets = pd.read_csv('freedom_convoy_2022_01_06-2022_02_27_en.csv',lineterminator='\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. Export edges from Retweets\n",
    "retweets['original_twitter'] = retweets['text'].str.extract('RT @([a-zA-Z0-9]\\w{0,}):', expand=True)\n",
    "\n",
    "edges = retweets[['username', 'original_twitter']]\n",
    "edges.columns = ['Source', 'Target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "edges2 = edges.groupby(['Source','Target']).count()\n",
    "edges2 = edges2.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4217151"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(edges2['Source'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "edges2[['Source','Target']]=edges2[['Source','Target']].astype('str')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "g = nx.from_pandas_edgelist(edges2, 'Source','Target')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "G = nx.from_pandas_edgelist(edges2, 'Source', 'Target', create_using=nx.DiGraph)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df= pd.DataFrame(dict(\n",
    "    Degree = dict(G.degree),\n",
    "    Indegree = dict(G.in_degree),\n",
    "    Outdegree = dict(G.out_degree),\n",
    "    Modularity = community_louvain.best_partition(g)\n",
    "    )) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_index=df.reset_index().rename(columns={'index':'username'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = retweets.merge(df_index, on = 'username', how = 'left')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "result.to_csv('FreedomConvoy_SNA_jan06-feb27.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0     2307217\n",
       "4.0     1947324\n",
       "5.0     1373906\n",
       "1.0      668688\n",
       "2.0       38867\n",
       "3.0       38339\n",
       "10.0      35318\n",
       "15.0      26137\n",
       "7.0       23770\n",
       "12.0      11796\n",
       "Name: Modularity, dtype: int64"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.Modularity.value_counts().head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
